AddN

对多个输入张量进行 逐元素相加,并将结果输出; 支持标量广播。

数学表达式为:

\[\text{output} = \sum_{i=0}^{tensor\_num-1}\text{inputs[i]}\]
输入:
  • inputs - 输入数据地址数组。

  • tensor_num - 输入张量数量。

  • element_nums - 输入张量元素数量数组。

  • core_mask - 核掩码(仅适用于共享存储版本)。

输出:
  • output - 计算结果地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持 fp32fp64int8int16int32cplx64cplx128 类型

  • MT7004 支持 fp16fp32int16int32cplx64 类型

共享存储版本:

void hp_addn_s(half **inputs, half *output, int tensor_num, int *element_nums, int core_mask)
void fp_addn_s(float **inputs, float *output, int tensor_num, int *element_nums, int core_mask)
void dp_addn_s(double **inputs, double *output, int tensor_num, int *element_nums, int core_mask)
void i8_addn_s(int8_t **inputs, int8_t *output, int tensor_num, int *element_nums, int core_mask)
void i16_addn_s(int16_t **inputs, int16_t *output, int tensor_num, int *element_nums, int core_mask)
void i32_addn_s(int32_t **inputs, int32_t *output, int tensor_num, int *element_nums, int core_mask)
void c64_addn_s(float **inputs, float *output, int tensor_num, int *element_nums, int core_mask)
void c128_addn_s(double **inputs, double *output, int tensor_num, int *element_nums, int core_mask)

C调用示例:

 1// FT78NE 示例
 2#include <stdio.h>
 3#include <addn.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *input0 = (float *)0xA0000000;
 7    float *input1 = (float *)0xA0010000;
 8    float **inputs = (float **)0xB0000000;
 9    inputs[0] = input0;
10    inputs[1] = input1;
11    float *output = (float *)0xC0000000;
12    int tensor_num = 2;
13    int *element_nums = (int *)0xB0001000;
14    element_nums[0] = 1024;
15    element_nums[1] = 1024;
16    int core_mask = 0xff;
17    fp_addn_s(inputs, output, tensor_num, element_nums, core_mask);
18    return 0;
19}

私有存储版本:

void hp_addn_p(half **inputs, half *output, int tensor_num, int *element_nums)
void fp_addn_p(float **inputs, float *output, int tensor_num, int *element_nums)
void dp_addn_p(double **inputs, double *output, int tensor_num, int *element_nums)
void i8_addn_p(int8_t **inputs, int8_t *output, int tensor_num, int *element_nums)
void i16_addn_p(int16_t **inputs, int16_t *output, int tensor_num, int *element_nums)
void i32_addn_p(int32_t **inputs, int32_t *output, int tensor_num, int *element_nums)
void c64_addn_p(float **inputs, float *output, int tensor_num, int *element_nums)
void c128_addn_p(double **inputs, double *output, int tensor_num, int *element_nums)

C调用示例:

 1// FT78NE 示例
 2#include <stdio.h>
 3#include <addn.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *input0 = (float *)0x10810000;   // L2 空间
 7    float *input1 = (float *)0x10814000;
 8    float **inputs = (float **)0x10818000;
 9    inputs[0] = input0;
10    inputs[1] = input1;
11    float *output = (float *)0x10820000;
12    int tensor_num = 2;
13    int *element_nums = (int *)0x1082C000;
14    element_nums[0] = 1024;
15    element_nums[1] = 1024;
16    fp_addn_p(inputs, output, tensor_num, element_nums);
17    return 0;
18}